home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
SGI Freeware 2002 November
/
SGI Freeware 2002 November - Disc 2.iso
/
dist
/
fw_glimpse.idb
/
usr
/
freeware
/
src
/
glimpse-3.0
/
libtemplate
/
template
/
print-urlrefs.c.z
/
print-urlrefs.c
Wrap
C/C++ Source or Header
|
1997-09-09
|
5KB
|
130 lines
static char rcsid[] = "$Id: print-urlrefs.c,v 1.9 1995/01/10 16:30:39 hardy Exp $";
/*
* print-urlrefs - Reads SOIF and prints normalized URLs from the
* URL-References attribute. Used to extract URLs from HTML object sums.
*
* Usage: print-urlrefs
*
* Darren Hardy, hardy@cs.colorado.edu, July 1994
*
* ----------------------------------------------------------------------
* Copyright (c) 1994, 1995. All rights reserved.
*
* Mic Bowman of Transarc Corporation.
* Peter Danzig of the University of Southern California.
* Darren R. Hardy of the University of Colorado at Boulder.
* Udi Manber of the University of Arizona.
* Michael F. Schwartz of the University of Colorado at Boulder.
*
* This copyright notice applies to all code in Harvest other than
* subsystems developed elsewhere, which contain other copyright notices
* in their source text.
*
* The Harvest software was developed by the Internet Research Task
* Force Research Group on Resource Discovery (IRTF-RD). The Harvest
* software may be used for academic, research, government, and internal
* business purposes without charge. If you wish to sell or distribute
* the Harvest software to commercial clients or partners, you must
* license the software. See
* http://harvest.cs.colorado.edu/harvest/copyright,licensing.html#licensing.
*
* The Harvest software is provided ``as is'', without express or
* implied warranty, and with no support nor obligation to assist in its
* use, correction, modification or enhancement. We assume no liability
* with respect to the infringement of copyrights, trade secrets, or any
* patents, and are not responsible for consequential damages. Proper
* use of the Harvest software is entirely the responsibility of the user.
*
* For those who are using Harvest for non-commercial purposes, you may
* make derivative works, subject to the following constraints:
*
* - You must include the above copyright notice and these accompanying
* paragraphs in all forms of derivative works, and any documentation
* and other materials related to such distribution and use acknowledge
* that the software was developed at the above institutions.
*
* - You must notify IRTF-RD regarding your distribution of the
* derivative work.
*
* - You must clearly notify users that your are distributing a modified
* version and not the original Harvest software.
*
* - Any derivative product is also subject to the restrictions of the
* copyright, including distribution and use limitations.
*/
#include <stdio.h>
#include <string.h>
#include <time.h>
#include "util.h"
#include "url.h"
#include "template.h"
static void print_urlrefs(t)
Template *t;
{
AVPair *avp;
char *s, url[BUFSIZ];
URL *up;
if ((avp = extract_AVPair(t->list, "URL-References")) == NULL)
return;
/* For each line in the data, grab the URL */
for (s = strtok(avp->value, "\n"); s != NULL; s = strtok(NULL, "\n")) {
url[0] = '\0';
/* Remove poorly formated lines */
if (strchr(s, '=') || strchr(s, ' ') || strchr(s, '<'))
continue;
if (strncmp(s, "mailto:", 6) == 0)
continue;
if (strncmp(s, "http:", 5) == 0 && !strstr(s, "://"))
continue;
if (strstr(s, "://") != NULL) {
/* Is this URL ok as-is? If so, save it */
strcpy(url, s);
} else if (s[0] == '/') {
/* This URL is relative to the top of t->url */
char *thishost = t->url + strlen("http://"), *z;
z = strchr(thishost, '/');
if (z != NULL) *z = '\0';
sprintf(url, "http:/%s%s", thishost, s);
if (z != NULL) *z = '/';
} else {
/* This URL is relative to t->url */
char *z = strdup(t->url), *p;
if ((p = strrchr(z, '/')) != NULL) *p = '\0';
sprintf(url, "%s/%s", z, s);
xfree(z);
}
/* If the URL is set, then parse it and print if ok */
if (url[0]) {
if ((up = url_open(url)) != NULL) {
printf("%s\n", up->url);
url_close(up);
}
}
}
}
int main(argc, argv)
int argc;
char *argv[];
{
Template *template;
Buffer *b;
init_parse_template_file(stdin);
while ((template = parse_template()) != NULL) {
print_urlrefs(template);
printf("%s\n", template->url);
free_template(template);
}
finish_parse_template();
exit(0);
}